#
#   Example - Python scanner
#

import exceptions
from plex import *

class NaughtyNaughty(exceptions.Exception):
    pass

class PythonScanner(Scanner):	 
  
    def open_bracket_action(self, text):
        self.bracket_nesting_level = self.bracket_nesting_level + 1
        return text

    def close_bracket_action(self, text):
        self.bracket_nesting_level = self.bracket_nesting_level - 1
        return text

    def current_level(self):
        return self.indentation_stack[-1]

    def newline_action(self, text):
        if self.bracket_nesting_level == 0:
            self.begin('indent')
            return 'newline'

    def indentation_action(self, text):
        # Check that tabs and spaces are being used consistently.
        if text:
            c = text[0]
            if self.indentation_char is None:
                self.indentation_char = c
            else:
                if self.indentation_char <> c:
                    raise NaughtyNaughty("Mixed up tabs and spaces!")

        # Figure out how many indents/dedents to do
        current_level = self.current_level()
        new_level = len(text)
        if new_level > current_level:
            self.indent_to(new_level)
        elif new_level < current_level:
            self.dedent_to(new_level)
        # Change back to default state
        self.begin('')

    def indent_to(self, new_level):
        self.indentation_stack.append(new_level)
        self.produce('INDENT', '')

    def dedent_to(self, new_level):
        while new_level < self.current_level():
            del self.indentation_stack[-1]
            self.produce('DEDENT', '')

        if new_level <> self.current_level():
            raise NaughtyNaughty("Indentation booboo!")

    def eof(self):
        self.dedent_to(0)

    letter = Range("AZaz") | Any("_")
    digit = Range("09")
    hexdigit = Range("09AFaf")

    name = letter + Rep(letter | digit)
    number = Rep1(digit) | (Str("0x") + Rep1(hexdigit))
  
    sq_string = (
        Str("'") + 
        Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) + 
        Str("'"))

    dq_string = (
        Str('"') + 
        Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) + 
        Str('"'))

    non_dq = AnyBut('"') | (Str('\\') + AnyChar)
    tq_string = (
        Str('"""') +
        Rep(
            non_dq |
            (Str('"') + non_dq) |
            (Str('""') + non_dq)) + Str('"""'))
      
    stringlit = sq_string | dq_string | tq_string
    opening_bracket = Any("([{")
    closing_bracket = Any(")]}")
    punct1 = Any(":,;+-*/|&<>=.%`~^")
    punct2 = Str("==", "<>", "!=", "<=", "<<", ">>", "**")
    punctuation = punct1 | punct2

    spaces = Rep1(Any(" \t"))
    indentation = Rep(Str(" ")) | Rep(Str("\t"))
    lineterm = Str("\n") | Eof
    escaped_newline = Str("\\\n")
    comment = Str("#") + Rep(AnyBut("\n"))
    blank_line = indentation + Opt(comment) + lineterm
  
    lexicon = Lexicon([
        (name,            'name'),
        (number,          'number'),
        (stringlit,       'string'),
        (punctuation,     TEXT),
        (opening_bracket, open_bracket_action),
        (closing_bracket, close_bracket_action),
        (lineterm,        newline_action),
        (comment,         IGNORE),
        (spaces,          IGNORE),
        (escaped_newline, IGNORE),
        State('indent', [
            (blank_line,    IGNORE),
            (indentation,   indentation_action),
        ]),
    ])

    def __init__(self, file):
        Scanner.__init__(self, self.lexicon, file)
        self.indentation_stack = [0]
        self.bracket_nesting_level = 0
        self.indentation_char = None
        self.begin('indent')

f = open("python.in", "r")
scanner = PythonScanner(f)
level = 0
while 1:
    token, text = scanner.read()
    if token is None:
        break
    if token == 'INDENT':
        level = level + 1
    elif token == 'DEDENT':
        level = level - 1

    indent = ' ' * (level * 4)
    if not text or token == text:
        value = token
    else:
        value = "%s(%s)" % (token, repr(text))

    print indent + value
